1 module d_tree_sitter.parser;
2 
3 extern (C):
4 
5 import d_tree_sitter.language;
6 import d_tree_sitter.tree;
7 import d_tree_sitter.tree_visitor;
8 import d_tree_sitter.tree_printer;
9 import d_tree_sitter.libc : TSTree;
10 
11 import std.typecons : Nullable;
12 import std.format : format;
13 import std..string : fromStringz, toStringz;
14 
15 /** A stateful object that this is used to produce a `Tree` based on some source code */
16 struct Parser
17 {
18   import d_tree_sitter.libc : TSParser, ts_parser_new, ts_parser_delete,
19     ts_parser_language, ts_parser_set_language, ts_parser_logger, TSLogger,
20     ts_parser_print_dot_graphs, ts_parser_parse, ts_parser_parse_string,
21     ts_parser_parse_string_encoding, TSInput, TSInputEncoding;
22   import std.stdio : File;
23 
24   /** internal TSParser */
25   TSParser* tsparser;
26 
27   /** Create a new Parser for the given language.
28       NOTE: It assumes that the language is compatible (uses `set_language_nothrow`).
29       Params:
30         language = the language you want to create a parser for
31   */
32   this(in Language language) nothrow @nogc
33   {
34     // Create a parser.
35     this.tsparser = ts_parser_new();
36 
37     // Set the parser's language.
38     const success = this.set_language_nothrow(language);
39     assert(success);
40   }
41 
42   @disable this();
43   @disable this(this);
44 
45   ~this() @nogc nothrow
46   {
47     stop_printing_dot_graphs();
48     ts_parser_delete(this.tsparser);
49   }
50 
51   /**
52    * Set the language that the parser should use for parsing.
53    *
54    * NOTE it assumes that the language is compatible. Returns a boolean indicating whether or not the language was successfully
55    * assigned.
56    */
57   auto set_language_nothrow(in Language language) nothrow
58   {
59     return ts_parser_set_language(tsparser, language.tslanguage);
60   }
61 
62   /**
63    * Set the language that the parser should use for parsing.
64    *
65    * Returns a boolean indicating whether or not the language was successfully
66    * assigned. True means assignment succeeded. False means there was a version
67    * mismatch, the language was gen with an incompatible version of the
68    * Tree-sitter CLI. Check the language's version using `ts_language_version`
69    * and compare it to this library's `TREE_SITTER_LANGUAGE_VERSION` and
70    * `TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION` constants.
71    */
72   auto set_language(in Language language)
73   {
74     // TODO make set_language private?
75     enforce_compatible_language(language);
76     return ts_parser_set_language(tsparser, language.tslanguage);
77   }
78 
79   /** Throws an error if the version of the given language is not compatible */
80   void enforce_compatible_language(Language language) const
81   {
82     import d_tree_sitter.libc : TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION,
83       TREE_SITTER_LANGUAGE_VERSION;
84 
85     auto language_version = language.get_version();
86     if (language_version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
87         || language_version > TREE_SITTER_LANGUAGE_VERSION)
88     {
89       throw new Exception(
90           format!"Incompatible language version %d. Expected minimum %d, maximum %d"(language_version,
91           TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION, TREE_SITTER_LANGUAGE_VERSION));
92     }
93   }
94 
95   /** Get the parser's current language. */
96   auto language() const @nogc nothrow
97   {
98     auto ptr = ts_parser_language(tsparser);
99     if (!ptr)
100     {
101       return Nullable!Language.init;
102     }
103     return Nullable!Language(Language(ptr));
104   }
105 
106   /** Get the parser's current logger. */
107   TSLogger* logger() const @nogc nothrow
108   {
109     auto logger = ts_parser_logger(tsparser);
110     return cast(TSLogger*) logger.payload;
111   }
112 
113   // TODO
114   // set_logger
115 
116   /**
117     Set the destination to which the parser should write debugging graphs
118     during parsing. The graphs are formatted in the DOT language. You may want
119     to pipe these graphs directly to a `dot(1)` process in order to generate
120     SVG output.
121   */
122   auto print_dot_graphs(File file)
123   {
124     // TODO is file.fileno a raw fd?!
125     return ts_parser_print_dot_graphs(tsparser, file.fileno());
126   }
127 
128   /** Stop the parser from printing debugging graphs while parsing. */
129   auto stop_printing_dot_graphs() @nogc nothrow
130   {
131     return ts_parser_print_dot_graphs(tsparser, -1);
132   }
133 
134   /**
135     Use the parser to parse some source code and create a syntax tree.
136 
137     If you are parsing this document for the first time, pass `NULL` for the
138     `old_tree` parameter. Otherwise, if you have already parsed an earlier
139     version of this document and the document has since been edited, pass the
140     previous syntax tree so that the unchanged parts of it can be reused.
141     This will save time and memory. For this to work correctly, you must have
142     already edited the old syntax tree using the `ts_tree_edit` function in a
143     way that exactly matches the source code changes.
144 
145     The `TSInput` parameter lets you specify how to read the text. It has the
146     following three fields:
147     1. `read`: A function to retrieve a chunk of text at a given byte offset
148        and (row, column) position. The function should return a pointer to the
149        text and write its length to the `bytes_read` pointer. The parser does
150        not take ownership of this buffer; it just borrows it until it has
151        finished reading it. The function should write a zero value to the
152        `bytes_read` pointer to indicate the end of the document.
153     2. `payload`: An arbitrary pointer that will be passed to each invocation
154        of the `read` function.
155     3. `encoding`: An indication of how the text is encoded. Either
156        `TSInputEncodingUTF8` or `TSInputEncodingUTF16`.
157 
158     This function returns a syntax tree on success, and `NULL` on failure. There
159     are three possible reasons for failure:
160     1. The parser does not have a language assigned. Check for this using the
161       `ts_parser_language` function.
162     2. Parsing was cancelled due to a timeout that was set by an earlier call to
163        the `ts_parser_set_timeout_micros` function. You can resume parsing from
164        where the parser left out by calling `ts_parser_parse` again with the
165        same arguments. Or you can start parsing from scratch by first calling
166        `ts_parser_reset`.
167     3. Parsing was cancelled using a cancellation flag that was set by an
168        earlier call to `ts_parser_set_cancellation_flag`. You can resume parsing
169        from where the parser left out by calling `ts_parser_parse` again with
170        the same arguments.
171    */
172   auto parse(TSInput input, const TSTree* old_tree = Tree.create_empty()) @nogc nothrow
173   {
174     return ts_parser_parse(tsparser, old_tree, input);
175   }
176 
177   /**
178     Use the parser to parse some source code stored in one contiguous buffer.
179     The first two parameters are the same as in the `ts_parser_parse` function
180     above. The second two parameters indicate the location of the buffer and its
181     length in bytes.
182    */
183   auto parse(const string source_code, const TSTree* old_tree = Tree.create_empty()) nothrow
184   {
185     // convert to c string
186     const source_code_c = toStringz(source_code);
187     const source_code_length = cast(uint)(source_code.length);
188     return ts_parser_parse_string(tsparser, old_tree, source_code_c, source_code_length);
189   }
190 
191   /**
192     Use the parser to parse some source code stored in one contiguous buffer with
193     a given encoding. The first four parameters work the same as in the
194     `ts_parser_parse_string` method above. The final parameter indicates whether
195     the text is encoded as UTF8 or UTF16.
196    */
197   auto parse(const string source_code, const TSInputEncoding encoding,
198       const TSTree* old_tree = Tree.create_empty()) nothrow
199   {
200     // convert to c string
201     const source_code_c = toStringz(source_code);
202     const source_code_length = cast(uint)(source_code.length);
203     return ts_parser_parse_string_encoding(tsparser, old_tree, source_code_c,
204         source_code_length, encoding);
205   }
206 
207   /**
208     Parse the given source_code that is in utf8 encoding
209   */
210   auto parse_utf8(const string source_code, const TSTree* old_tree = Tree.create_empty()) nothrow
211   {
212     return parse(source_code, TSInputEncoding.TSInputEncodingUTF8, old_tree);
213   }
214 
215   /**
216     Parse the given source_code that is in utf16 encoding
217   */
218   auto parse_utf16(const wstring source_code, const TSTree* old_tree = Tree.create_empty()) nothrow @nogc
219   {
220     // TODO is this correct?
221     // convert to c string
222     const source_code_c = cast(const char*)(source_code);
223     const source_code_length = cast(uint)(source_code.length);
224     return ts_parser_parse_string_encoding(tsparser, old_tree, source_code_c,
225         source_code_length, TSInputEncoding.TSInputEncodingUTF16);
226   }
227 
228   /**
229         Get the S-expression of the given source code
230         Params:
231             source_code =     the given source code as a string
232         Returns: the parsed S-expression
233        */
234   auto s_expression(const string source_code) nothrow
235   {
236     auto tree = Tree(parse(source_code));
237 
238     // Get the root node of the syntax tree.
239     auto root_node = tree.root_node();
240 
241     // Print the syntax tree as an S-expression.
242     return root_node.to_string();
243   }
244 
245   /**
246     Traverse the [Tree] starting from its root [Node] applying a visitor at all nodes.
247   */
248   void traverse(const string source_code, TreeVisitor visitor)
249   {
250     auto tree = Tree(parse(source_code));
251 
252     // Get the root node of the syntax tree.
253     auto root_node = tree.root_node();
254 
255     root_node.traverse(visitor);
256   }
257 
258   /**
259     Traverse the `Tree` starting from its root `Node` and print information about each
260   */
261   string traverse_print(const string source_code) @trusted
262   {
263     auto tree = Tree(parse(source_code));
264 
265     // Get the root node of the syntax tree.
266     auto root_node = tree.root_node();
267 
268     // a visitor to print information
269     auto visitor = new TreePrinter(source_code);
270 
271     root_node.traverse(visitor);
272 
273     return cast(string) visitor.tree_string; // convert bc.string.String to string
274   }
275 }